#!/usr/bin/env python
#-*- coding:utf-8 -*-

# file:extract_keywords.py
# author:张世航
# datetime:2024/11/9 17:45
# software: PyCharm
"""
this is function  description 
"""
# import module your need

# 7 对一篇中文文献, ;利用jieba库,进行词频统计分析找出文章的关键词(取词频最高的前10个词语,作为文章的关键字);
#
import jieba
from collections import Counter


def get_text(file_path):
    """读取中文文本文件"""
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            return file.read()
    except Exception as e:
        print(f"发生错误: {e}")
        return ""


def extract_keywords(text):
    """提取关键词并返回前10个词语"""
    # 使用jieba进行分词
    words = jieba.cut(text)

    # 统计词频
    word_counts = Counter(words)

    # 返回前10个词频最高的词语
    return word_counts.most_common(10)


def main():
    # 请确保将文件路径替换为您中文文献的实际路径
    file_path = "chinese_article"  # 替换为实际文件名
    text = get_text(file_path)  # 读取文本
    keywords = extract_keywords(text)  # 提取关键词

    print("文章的关键词（前10个词语）：")
    for word, count in keywords:
        print(f"{word}: {count}")


if __name__ == "__main__":
    main()